Subset B cells
b_cells <- rownames(subset(metadata, metadata$annotation %in% c("NBC_MBC","PC","GCBC")))
b_clusters_obj = subset(seurat_obj,cells=b_cells)
Sub-clustering of B cell clusters
b_clusters_obj@meta.data %>%
ggplot(aes(UMAP1, UMAP2, color = annotation)) +
geom_point(size = 0.75) +
scale_color_manual(values = color) +
labs(x = "UMAP1", y = "UMAP2", color = "") +
theme_classic()

metadata <- b_clusters_obj@meta.data
df = count(metadata,metadata$annotation)
colnames(df) = c("Cluster","Number_of_cells")
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
kable(df) %>%
kable_styling("striped", full_width = T)
|
Cluster
|
Number_of_cells
|
|
GCBC
|
15134
|
|
NBC_MBC
|
13931
|
|
PC
|
1974
|
ADT and RNA multimodal analysis
DefaultAssay(b_clusters_obj) <- 'RNA'
b_clusters_obj <- NormalizeData(b_clusters_obj) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() %>%
RunHarmony(reduction = "pca",dims = 1:30,group.by.vars = "gemid",assay.use = "RNA",reduction.save = "harmony_RNA")
## Centering and scaling data matrix
## PC_ 1
## Positive: HMGB2, STMN1, MKI67, TUBA1B, H2AFZ, TOP2A, NUSAP1, HIST1H4C, TUBB, HIST1H1B
## TUBB4B, CENPF, HMGN2, CDK1, KIFC1, KIF22, TPX2, CCNB2, HIST1H3B, KNL1
## CDCA8, BIRC5, CCNB1, HJURP, NCAPD2, CDCA3, MCM7, SMC4, NCAPH, AURKB
## Negative: FCMR, CCR6, CAPG, SLC2A3, IFITM1, PLAC8, FCGR2B, ENTPD1, TNFRSF13B, FCRL5
## CD44, IFNGR1, TENT5C, CD69, SSR4, PDE4B, ZEB2, S1PR1, FCER2, CD83
## MAML2, FCRL4, COL19A1, RNASE6, PARP15, FCRL3, PDCD4, CCR7, CD24, GPR183
## PC_ 2
## Positive: PCNA, RRM2, MCM6, GINS2, MCM3, MCM7, MCM4, NME2, HELLS, CDC6
## FEN1, CLSPN, ATAD2, FABP5, DHFR, HSPD1, RFC2, PAICS, HSP90AB1, CHCHD2
## TK1, FAM111B, RPL8, HSPE1, CDC45, NME1, SIVA1, SERBP1, DTL, RFC4
## Negative: KIF20A, CENPE, CENPF, CCNB2, HMMR, ASPM, AURKA, CCNB1, ARL6IP1, KPNA2
## TOP2A, DEPDC1, DLGAP5, KIF23, KIF14, CDCA8, SGO2, KNSTRN, CKAP2, PSRC1
## HJURP, NEK2, BUB1, TPX2, PLK1, NUF2, ECT2, POLH, CKAP5, CDC20
## PC_ 3
## Positive: MTRNR2L12, MT-CO1, HIST1H1D, HIST1H1B, HIST1H3C, ACTB, HIST1H2AH, HIST1H3B, HIST1H4F, MTRNR2L8
## MCM3, HIST1H4D, HIST1H1E, HIST1H2BH, HIST1H2BF, HIST1H3F, TUBB, CD83, HIST1H4C, HIST2H2BF
## HIST1H2BL, HIST1H2AE, HIST1H2BJ, HIST1H2BM, MCM7, HIST1H2BB, HIST1H4L, FAM111B, CLSPN, WDR76
## Negative: MZB1, JCHAIN, DERL3, SSR4, HSP90B1, SEC11C, XBP1, IGHG1, TENT5C, FKBP11
## LINC02362, PPIB, HSPA5, IGKC, SSR3, SLAMF7, LMAN1, PRDX4, AC012236.1, SELENOK
## HYOU1, ADA2, IGHG3, IGHG4, PDIA4, IGLC2, SELENOS, NUCB2, IGHG2, ZBP1
## PC_ 4
## Positive: RPL8, ACTB, PTMA, CHCHD2, NME2, SERBP1, HLA-C, CDC20, CCT4, GPX4
## PGAM1, JPT1, LSM7, NDUFS7, PLK1, CRIP1, PLIN3, UBE2C, MLF2, HMGN2
## MRPS6, GCHFR, HSP90AB1, ID3, RUVBL2, NANS, MRPS12, MPC2, PTTG1, SIVA1
## Negative: HIST1H1C, HIST1H3C, IGHG1, HIST1H2BF, HIST1H1B, JCHAIN, HIST1H2AH, HIST1H3B, HIST1H2BJ, HIST1H2BC
## HIST1H1D, DERL3, HIST1H2BH, HIST1H4F, MZB1, HIST1H4D, HIST2H2BF, HIST1H3F, HIST1H2AE, HIST1H1E
## HIST1H2BL, HIST1H2BB, HIST1H3H, HIST1H4H, HIST1H2BN, HIST1H2BM, HIST1H4L, XBP1, TENT5C, HIST1H2AB
## PC_ 5
## Positive: FCRL4, TNFRSF13B, CD44, CAPG, ENTPD1, CCR6, SLC2A3, HLA-C, FCRL5, IFNGR1
## PLAC8, S100A4, SIGLEC6, PTPN1, ZEB2, ZBED2, CCR1, IFITM1, FCGR2B, ACP5
## SEMA4D, AHNAK, MYL9, PARP15, MYO1F, FLNA, PREX1, GPR183, GPR82, VIM
## Negative: SERPINA9, SUGCT, MME, GAPDH, JCHAIN, H2AFZ, RNF144B, PTTG1, AICDA, GINS2
## TCL6, HMGN2, SLC2A5, STMN1, LOXL2, NUGGC, PRDM15, NANS, MEF2B, CDCA7
## EML6, IFNG-AS1, CPM, LHFPL2, AC133065.1, DSTN, GCHFR, AC012236.1, NEIL1, CCDC144A
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony converged after 3 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony_RNA; see ?make.names for more
## details on syntax validity
Check for Cell cycle stages variability between clusters
load(saved_cell_cycle_obj)
b_clusters_obj <- CellCycleScoring(b_clusters_obj,
g2m.features = g2m_genes,
s.features = s_genes)
DefaultAssay(b_clusters_obj) <- 'ADT'
# we will use all ADT features for dimensional reduction
# we set a dimensional reduction name to avoid overwriting the
VariableFeatures(b_clusters_obj) <- rownames(b_clusters_obj[["ADT"]])
b_clusters_obj <- NormalizeData(b_clusters_obj, normalization.method = 'CLR', margin = 2) %>%
ScaleData() %>% RunPCA(reduction.name = 'apca') %>%
RunHarmony(reduction = "apca",dims = 1:20, group.by.vars = "gemid", assay.use = "ADT", reduction.save = "harmony_protein")
## Normalizing across cells
## Centering and scaling data matrix
## PC_ 1
## Positive: HLA-DR, CD138-(Syndecan-1), IgA, CD90-(Thy1), CD20, NLRP2.1, CD38.1, IgG-Fc, 0856-anti-Tau-Phospho-(Thr181), 1055-anti-c-Met
## CD81-(TAPA-1), CD45, HLA-F.1, CD45RA, CD19.1, CD133, CD30, TCR-gamma-delta, CD11a, HLA-A-B-C
## CD193-(CCR3), CD47.1, CD309-(VEGFR2), CD15-(SSEA-1), CD10, CD11b, CD40.1, CD21, CD86.1, CD268-(BAFF-R)
## Negative: CD66b, CD274-(B7-H1--PD-L1), TIGIT-(VSTM3), CD197-(CCR7), CD23, IgG2a--kappa-isotype-Ctrl, CD223-(LAG-3), TCR-Valpha24-Jalpha18-(iNKT-cell), CD303-(BDCA-2), CD267-(TACI)
## CD96-(TACTILE), CD370-(CLEC9A-DNGR1), CD73-(Ecto-5-nucleotidase), CD169-(Sialoadhesin--Siglec-1), CD324-(E-Cadherin), CD106, Mac-2-(Galectin-3), TCR-Vbeta13.1, CD269-(BCMA), CD137-(4-1BB)
## CD144-(VE-Cadherin), CD49f, CD62L, CD122-(IL-2Rbeta), CD307d-(FcRL4), CD124-(IL-4Ralpha), CX3CR1.1, DR3-(TRAMP), CD366-(Tim-3), CD207.1
## PC_ 2
## Positive: CD45RA, CD19.1, CD21, CD45, HLA-DR, CD47.1, CD40.1, CD82.1, CD58-(LFA-3), CD268-(BAFF-R)
## CD99.1, TCR-gamma-delta, CD52.1, CD35, CD54, HLA-A-B-C, CD20, NLRP2.1, CD11a, CD38.1
## CD138-(Syndecan-1), CD360-(IL-21R), CD336-(NKp44), HLA-F.1, CD69.1, 1055-anti-c-Met, CD193-(CCR3), Podoplanin, CD22.1, CD81-(TAPA-1)
## Negative: CD66b, CD23, CD122-(IL-2Rbeta), CD274-(B7-H1--PD-L1), CD163.1, CD169-(Sialoadhesin--Siglec-1), CD267-(TACI), CD303-(BDCA-2), integrin-beta7, IgG2a--kappa-isotype-Ctrl
## CD370-(CLEC9A-DNGR1), CD56-(NCAM), TCR-alpha-beta, CD66a-c-e, CD235ab, CD57-Recombinant, CD34.1, CD204, CD16, FcepsilonRIalpha
## CD36.1, CD49f, CD197-(CCR7), HLA-A2, CD106, IgG2b--kappa-isotype-Ctrl, CD124-(IL-4Ralpha), CD206-(MMR), CD328-(Siglec-7), IgG2b--kappa-Isotype-Ctrl
## PC_ 3
## Positive: CD32, CD35, CD69.1, CD1c, CD82.1, CD73-(Ecto-5-nucleotidase), CD39, CD21, CD305-(LAIR1), CD47.1
## CD196-(CCR6), CD52.1, CD62L, IgD, CD268-(BAFF-R), CD99.1, CD54, CD19.1, CD5.1, CD44.1
## IgM, CD49d, CD31, CD45RA, CD71, CD22.1, CD2.1, CD18, CD45, CD278-(ICOS)
## Negative: CD90-(Thy1), CD138-(Syndecan-1), 0856-anti-Tau-Phospho-(Thr181), CD154, B7-H4, 1055-anti-c-Met, CD226-(DNAM-1), NLRP2.1, TCR-gamma-delta, HLA-F.1
## CD133, IgG-Fc, CD337-(NKp30), CD309-(VEGFR2), CD158f-(KIR2DL5), CD303-(BDCA-2), CD269-(BCMA), CD336-(NKp44), CD15-(SSEA-1), CD193-(CCR3)
## CD124-(IL-4Ralpha), CD169-(Sialoadhesin--Siglec-1), CD137L-(4-1BB-Ligand), XCR1.1, CD204, CD30, CD141-(Thrombomodulin), CD58-(LFA-3), CD144-(VE-Cadherin), CD178-(Fas-L)
## PC_ 4
## Positive: CD10, CD95-(Fas), CD81-(TAPA-1), CD38.1, CD71, CD2.1, CD3, CD278-(ICOS), CD4.1, CD20
## TCR-alpha-beta, CD54, CD146, CD7.1, CD5.1, CD27.1, CD45, CD45RO, HLA-A-B-C, CD279-(PD-1)
## CD49f, CD57-Recombinant, CD19.1, CD28.1, CD11a, CD224, CD40.1, CD58-(LFA-3), CD98, CD86.1
## Negative: IgD, IgM, CD305-(LAIR1), CD35, CD1c, CD196-(CCR6), CD32, CD73-(Ecto-5-nucleotidase), CD24.1, CD49d
## CD69.1, CD44.1, Ig-light-chain-lambda, TCR-gamma-delta, CD90-(Thy1), CD31, CD138-(Syndecan-1), Ig-light-chain-kappa, CD11b, 0856-anti-Tau-Phospho-(Thr181)
## CD21, 1055-anti-c-Met, CD39, CD268-(BAFF-R), CD23, integrin-beta7, CD133, NLRP2.1, CD45RA, HLA-F.1
## PC_ 5
## Positive: CD5.1, CD2.1, CD3, CD4.1, CD7.1, TCR-alpha-beta, CD278-(ICOS), CD28.1, CD279-(PD-1), CD45RO
## CD138-(Syndecan-1), CD90-(Thy1), CD69.1, 0856-anti-Tau-Phospho-(Thr181), CD49f, 1055-anti-c-Met, NLRP2.1, CD44.1, CD27.1, CD18
## CD226-(DNAM-1), HLA-F.1, CD133, CD62L, CD99.1, CD11b, TCR-gamma-delta, CD15-(SSEA-1), CD30, CD309-(VEGFR2)
## Negative: CD10, CD71, CD20, CD40.1, CD81-(TAPA-1), CD19.1, CD54, CD38.1, CD21, HLA-A-B-C
## CD268-(BAFF-R), IgG2a--kappa-isotype-Ctrl, CD86.1, CD66b, HLA-DR, CD146, CD80.1, CD197-(CCR7), CD124-(IL-4Ralpha), CD274-(B7-H1--PD-L1)
## CD370-(CLEC9A-DNGR1), CD360-(IL-21R), CD58-(LFA-3), Podoplanin, CD22.1, CD23, CD45RA, CD106, CD107a-(LAMP-1), CD184-(CXCR4)
## Warning: Cannot add objects with duplicate keys (offending key: PC_) setting key
## to original value 'apca_'
## Harmony 1/10
## Harmony 2/10
## Harmony converged after 2 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.ADT.harmony_protein; see ?make.names for more
## details on syntax validity
Multimodal Neighbor Identification
# Identify multimodal neighbors. These will be stored in the neighbors slot,
# and can be accessed using filtered_bcll.combined[['weighted.nn']]
# The WNN graph can be accessed at filtered_bcll.combined[["wknn"]],
# and the SNN graph used for clustering at filtered_bcll.combined[["wsnn"]]
# Cell-specific modality weights can be accessed at filtered_bcll.combined$RNA.weight
b_clusters_obj <- FindMultiModalNeighbors(
b_clusters_obj, reduction.list = list("harmony_RNA", "harmony_protein"),
dims.list = list(1:30, 1:20), modality.weight.name = "RNA.weight"
)
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Warning in FindMultiModalNeighbors(b_clusters_obj, reduction.list =
## list("harmony_RNA", : The number of provided modality.weight.name is not
## equal to the number of modalities. RNA.weight ADT.weight are used to store the
## modality weights
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
UMAP visualisation
feat_clust <- c(0.1,0.5,1,1.5,2)
b_clusters_obj <- RunUMAP(b_clusters_obj, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnn_UMAP_")
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:37:52 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:37:52 Commencing smooth kNN distance calibration using 1 thread
## 17:37:53 Initializing from normalized Laplacian + noise
## 17:37:53 Commencing optimization for 200 epochs, with 1023726 positive edges
## 17:38:05 Optimization finished
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from wnn_UMAP_ to wnnUMAP_
## Warning: All keys should be one or more alphanumeric characters followed by an
## underscore '_', setting key to wnnUMAP_
b_clusters_obj <- FindClusters(b_clusters_obj, graph.name = "wsnn", algorithm = 3, resolution = feat_clust, verbose = FALSE)
Seurat::DimPlot(b_clusters_obj, reduction = 'wnn.umap', label = TRUE, repel = TRUE, label.size = 2.5, group.by = c("wsnn_res.0.1","wsnn_res.0.5","wsnn_res.1","wsnn_res.1.5","wsnn_res.2")) + NoLegend() & Seurat::NoLegend()

b_clusters_obj <- FindClusters(b_clusters_obj, graph.name = "wsnn", algorithm = 3, resolution = 0.5, verbose = FALSE)
b_clusters_obj <- RunUMAP(b_clusters_obj, reduction = 'pca', dims = 1:30, assay = 'RNA',
reduction.name = 'rna.umap', reduction.key = 'rnaUMAP_')
## 17:40:02 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:40:02 Read 31039 rows and found 30 numeric columns
## 17:40:02 Using Annoy for neighbor search, n_neighbors = 30
## 17:40:02 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:40:05 Writing NN index file to temp file /tmp/Rtmp8r9LPm/file10609775303375
## 17:40:05 Searching Annoy index using 1 thread, search_k = 3000
## 17:40:13 Annoy recall = 100%
## 17:40:13 Commencing smooth kNN distance calibration using 1 thread
## 17:40:14 Initializing from normalized Laplacian + noise
## 17:40:14 Commencing optimization for 200 epochs, with 1395586 positive edges
## 17:40:25 Optimization finished
b_clusters_obj <- RunUMAP(b_clusters_obj, reduction = 'apca', dims = 1:18, assay = 'ADT',
reduction.name = 'adt.umap', reduction.key = 'adtUMAP_')
## 17:40:25 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:40:25 Read 31039 rows and found 18 numeric columns
## 17:40:25 Using Annoy for neighbor search, n_neighbors = 30
## 17:40:25 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:40:27 Writing NN index file to temp file /tmp/Rtmp8r9LPm/file10609748365496
## 17:40:27 Searching Annoy index using 1 thread, search_k = 3000
## 17:40:35 Annoy recall = 100%
## 17:40:35 Commencing smooth kNN distance calibration using 1 thread
## 17:40:36 Initializing from normalized Laplacian + noise
## 17:40:36 Commencing optimization for 200 epochs, with 1350972 positive edges
## 17:40:46 Optimization finished
p1 <- DimPlot(b_clusters_obj, reduction = 'rna.umap', label = TRUE,
repel = TRUE, label.size = 2.5) + NoLegend()
p2 <- DimPlot(b_clusters_obj, reduction = 'adt.umap', label = TRUE,
repel = TRUE, label.size = 2.5) + NoLegend()
p1

p2

Seurat::DimPlot(b_clusters_obj, reduction = 'wnn.umap', label = TRUE, repel = TRUE, label.size = 2.5, group.by = "wsnn_res.0.5")

metadata <- b_clusters_obj@meta.data
df = count(metadata,metadata$wsnn_res.0.5)
colnames(df) = c("Cluster","Number_of_cells")
library(knitr)
library(kableExtra)
kable(df) %>%
kable_styling("striped", full_width = T)
|
Cluster
|
Number_of_cells
|
|
0
|
6533
|
|
1
|
4824
|
|
10
|
1289
|
|
11
|
496
|
|
12
|
289
|
|
13
|
271
|
|
14
|
94
|
|
2
|
4344
|
|
3
|
2805
|
|
4
|
2174
|
|
5
|
1935
|
|
6
|
1890
|
|
7
|
1448
|
|
8
|
1357
|
|
9
|
1290
|